0.1 Introduction

This script is set to map the annotated SC dataset on to the spatial transcriptomics Visium slide. In this script we use the Visium data coming from 03-Clustering/03-clustering and the SC data comming from /scratch/devel/rmassoni/tonsil_atlas/current/scRNA-seq/results/R_objects/final_clusters/tonsil_atlas_all_cells_20210930.rds.

0.2 Libraries

library(Seurat)
library(ggpubr)
library(cowplot)
library(dplyr)
library(ggplot2)
library(stringr)
library(readr)
library(Matrix)
library(SPOTlight)

0.3 Setting parameters

Loading necessary paths and parameters

set.seed(123)
source(here::here("misc/paths.R"))
source(here::here("utils/bin.R"))

"{fig1}/{plt_dir}" %>%
  glue::glue() %>%
  here::here() %>%
  dir.create(path = .,
             showWarnings = FALSE,
             recursive = TRUE)

"{fig1}/{robj_dir}" %>%
  glue::glue() %>%
  here::here() %>%
  dir.create(path = .,
             showWarnings = FALSE,
             recursive = TRUE)

clust_vr <- "annotation_figure_1"

0.4 Load data

We have 8 different datasets that we have integrated in previous scripts - 03-clustering/03-clustering_integration.Rmds. We are going to analyze the integrated dataset all together since the regions are shared across all

# Load Tonsils integrated
sp_obj <- "misc/{robj_dir}/20220215_tonsil_atlas_spatial_seurat_obj.rds" %>%
  glue::glue() %>%
  here::here() %>%
  readRDS(file = .)

# Load single-cell dataset
sc_obj <- "misc/{robj_dir}/20220215_tonsil_atlas_rna_seurat_obj.rds" %>%
  glue::glue() %>%
  here::here() %>%
  readRDS(file = .)

0.5 Analysis

Look at cell types

flextable::flextable(data.frame(table(sc_obj@meta.data[, clust_vr])))

0.5.1 Combine cycling

sc_obj@meta.data <- sc_obj@meta.data %>%
    dplyr::mutate(
      annotation_figure_1 = dplyr::case_when(
        # Join all B cells
        annotation_figure_1 %in% c("cycling FDC", "Cycling", "cycling T", "cycling myeloid") ~ "Cycling",
        TRUE ~ annotation_figure_1))

0.5.2 Get markers

First of all we need to compute the markers for the cell types using Seurat's funtion FindAllMarkers. For the high level annotation we will compute the markers using the function FindAllMarkers.

Seurat::Idents(sc_obj) <- sc_obj@meta.data[, clust_vr]

sc_markers <- Seurat::FindAllMarkers(
  object = sc_obj,
  assay = "RNA",
  slot = "data",
  only.pos = TRUE,
  max.cells.per.ident = 500)

"{fig1}/{robj_dir}/sc_markers_fig1.rds" %>%
  glue::glue() %>%
  here::here() %>%
    saveRDS(object = sc_markers, file = .)

Filter markers and look

sc_markers <- "{fig1}/{robj_dir}/sc_markers_fig1.rds" %>%
  glue::glue() %>%
  here::here() %>%
    readRDS(file = .)

# sc_markers <- sc_markers %>%
#   dplyr::filter(avg_log2FC >= 0.5)


DT::datatable(sc_markers, filter = "top")

Make sure all the cell types have marker genes post filtering

count_df <- dplyr::count(sc_markers, cluster)
flextable::flextable(count_df)
unique(sc_obj@meta.data[, clust_vr]) %in% count_df$cluster
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

Subset SC object so all the cells from the same cell type to come from the same batch

# subset most representative sample(s)
ns <- with(sc_obj@meta.data, table(gem_id, annotation_figure_1))
ns <- as.matrix(unclass(ns))
m <- 100 # Max cells per cell type
# Extract cell barcodes
meta <- sc_obj@meta.data

id <- lapply(colnames(ns), function(nm) {
    x <- ns[, nm]
    # Initialize variables
    n <- 0    # N of cells
    s <- c()  # Gem IDs
    b <- c()  # Cell barcodes
    while (n < m & length(x) > 0) {
        # select gem id with the most cells
        i <- which.max(x)
        # Add gem id to vector s
        s <- c(s, names(i))
        # Add number of cells per cell type to n
        n <- n + x[i]
        # Remove gem id from x to move on to the next
        x <- x[-i]
        # extract barcode
        barcode <- rownames(meta[meta[, "gem_id"] == names(i) &
                            meta[, "annotation_figure_1"] == nm, ])
        # make sure it adds up to m
        # print(barcode)
        if (length(b) + length(barcode) > m) {
          barcode <- sample(x = barcode, size = m - length(b), replace = FALSE)
        }
        b <- c(b, barcode)  # Cell barcodes
        # print(b)
        
    }
    return(b)
})

sc_sub <- sc_obj[, unlist(id)]

0.5.3 Pre-deconv checks

Remove empty genes

table(Matrix::rowSums(sp_obj@assays$Spatial@counts) == 0)
## 
## FALSE  TRUE 
## 26759    87
table(Matrix::rowSums(sc_sub@assays$RNA@counts) == 0)
## 
## FALSE  TRUE 
## 23820 13558
sc_sub <- sc_sub[Matrix::rowSums(sc_sub@assays$RNA@counts) != 0, ]

sp_obj <- sp_obj[Matrix::rowSums(sp_obj@assays$Spatial@counts) != 0, ]

0.5.4 Deconvolution

Run deconvolution using the scRNAseq and the spatial transcriptomics data

decon_mtrx_ls <- SPOTlight::spotlight_deconvolution(
  se_sc = sc_obj,
  counts_spatial = sp_obj@assays$Spatial@counts,
  clust_vr = clust_vr,
  cluster_markers = sc_markers,
  cl_n = 50,
  hvg = 3000,
  ntop = NULL,
  transf = "uv",
  method = "nsNMF",
  min_cont = 0,
  assay = "RNA",
  slot = "counts")

Change names to remove . by original name

# names_df <- data.frame(
#   plt_name = unique(sc_obj@meta.data[, clust_vr]),
#   ct_name = stringr::str_replace_all(
#     string = unique(sc_obj@meta.data[, clust_vr]),
#     pattern = "[:punct:]|[:blank:]|[+]", "."))
# 
# 
# cnames <- colnames(decon_mtrx_ls[[2]])
# new_names <- data.frame("ct_name" = cnames) %>%
#   dplyr::left_join(names_df, by = "ct_name") %>%
#   dplyr::pull("plt_name")

# colnames(decon_mtrx_ls[[2]]) <- new_names

0.5.5 Save deconvolution

"{fig1}/{robj_dir}/decon_mtrx_{clust_vr}.rds" %>%
  glue::glue() %>%
  here::here() %>%
  saveRDS(
    object = decon_mtrx_ls,
    file = .)

0.5.6 Assess deconvolution

Before even looking at the decomposed spots we can gain insight on how well the model performed by looking at the topic profiles for the cell types.

nmf_mod <- decon_mtrx_ls[[1]]
decon_mtrx <- decon_mtrx_ls[[2]]

rownames(decon_mtrx) <- colnames(sp_obj)

The first thing we can do is look at how specific the topic profiles are for each cell type.

h <- NMF::coef(nmf_mod[[1]])
rownames(h) <- paste("Topic", 1:nrow(h), sep = "_")
topic_profile_plts <- SPOTlight::dot_plot_profiles_fun(
  h = h,
  train_cell_clust = nmf_mod[[2]])

topic_profile_plts[[2]] + 
  ggplot2::theme(
    axis.text.x = ggplot2::element_text(angle = 90), 
    axis.text = ggplot2::element_text(size = 12))

Next we can take a look at the how the individual topic profiles of each cell within each cell-type behave. Here we expect that all the cells from the same cell type show a similar topic profile distribution, if not there might be a bit more substructure in that cluster and we may only be capturing one or the other.

topic_profile_plts[[1]] +
  ggplot2::theme(
    axis.text.x = ggplot2::element_text(angle = 90), 
    axis.text = ggplot2::element_text(size = 12))

Lastly we can take a look at which genes are the most important for each topic and therefore get an insight into which genes are driving them.

basis_spotlight <- data.frame(NMF::basis(nmf_mod[[1]]))

colnames(basis_spotlight) <- glue::glue("Topic {1:length(unique(nmf_mod[[2]]))}")

DT::datatable(round(basis_spotlight, 5),
              filter = "top")

0.6 Session Info

sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
## 
## Matrix products: default
## BLAS/LAPACK: /scratch/groups/singlecell/software/anaconda3/envs/spatial_r/lib/libopenblasp-r0.3.12.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] tidyr_1.2.0         nnls_1.4            edgeR_3.32.1        limma_3.46.0        NMF_0.23.0          Biobase_2.50.0      BiocGenerics_0.36.1 cluster_2.1.2       rngtools_1.5.2      pkgmaker_0.32.2     registry_0.5-1      tibble_3.1.6        purrr_0.3.4         SPOTlight_0.1.7     Matrix_1.4-0        readr_2.1.2         stringr_1.4.0       dplyr_1.0.8         cowplot_1.1.1       ggpubr_0.4.0        ggplot2_3.3.5       SeuratObject_4.0.4  Seurat_4.1.0       
## 
## loaded via a namespace (and not attached):
##   [1] uuid_1.0-3            backports_1.4.1       systemfonts_1.0.4     plyr_1.8.6            igraph_1.2.11         lazyeval_0.2.2        splines_4.0.1         crosstalk_1.2.0       listenv_0.8.0         scattermore_0.8       gridBase_0.4-7        digest_0.6.29         foreach_1.5.2         htmltools_0.5.2       fansi_1.0.2           magrittr_2.0.2        doParallel_1.0.17     tensor_1.5            ROCR_1.0-11           tzdb_0.2.0            globals_0.14.0        matrixStats_0.61.0    officer_0.4.1         vroom_1.5.7           spatstat.sparse_2.1-0 colorspace_2.0-3      ggrepel_0.9.1         xfun_0.30             crayon_1.5.0          jsonlite_1.8.0        spatstat.data_2.1-2   iterators_1.0.14      survival_3.3-1        zoo_1.8-9             glue_1.6.2            polyclip_1.10-0       gtable_0.3.0          leiden_0.3.9          car_3.0-12            future.apply_1.8.1    abind_1.4-5           scales_1.1.1          DBI_1.1.2             rstatix_0.7.0         spatstat.random_2.1-0 miniUI_0.1.1.1        Rcpp_1.0.8            viridisLite_0.4.0     xtable_1.8-4          reticulate_1.24       spatstat.core_2.4-0   bit_4.0.4             DT_0.21               htmlwidgets_1.5.4    
##  [55] httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2             farver_2.1.0          pkgconfig_2.0.3       sass_0.4.0            uwot_0.1.11           deldir_1.0-6          locfit_1.5-9.4        utf8_1.2.2            here_1.0.1            labeling_0.4.2        tidyselect_1.1.2      rlang_1.0.2           reshape2_1.4.4        later_1.3.0           munsell_0.5.0         tools_4.0.1           cli_3.2.0             generics_0.1.2        broom_0.7.12          ggridges_0.5.3        evaluate_0.15         fastmap_1.1.0         yaml_2.3.5            goftest_1.2-3         knitr_1.37            bit64_4.0.5           fitdistrplus_1.1-6    zip_2.2.0             RANN_2.6.1            pbapply_1.5-0         future_1.24.0         nlme_3.1-155          mime_0.12             xml2_1.3.3            compiler_4.0.1        plotly_4.10.0         png_0.1-7             ggsignif_0.6.3        spatstat.utils_2.3-0  bslib_0.3.1           stringi_1.7.6         highr_0.9             gdtools_0.2.4         lattice_0.20-45       vctrs_0.3.8           pillar_1.7.0          lifecycle_1.0.1       spatstat.geom_2.3-2   lmtest_0.9-39         jquerylib_0.1.4       RcppAnnoy_0.0.19     
## [109] data.table_1.14.2     irlba_2.3.5           flextable_0.7.0       httpuv_1.6.5          patchwork_1.1.1       R6_2.5.1              promises_1.2.0.1      KernSmooth_2.23-20    gridExtra_2.3         parallelly_1.30.0     codetools_0.2-18      MASS_7.3-55           assertthat_0.2.1      rprojroot_2.0.2       withr_2.5.0           sctransform_0.3.3     mgcv_1.8-39           hms_1.1.1             grid_4.0.1            rpart_4.1.16          rmarkdown_2.12        carData_3.0-5         Rtsne_0.15            shiny_1.7.1           base64enc_0.1-3